
# A2b_price_index_agg_full_adj
#==============================================================================

# Description: This code computes the difference in the gains from trade between
# poor and rich on average on aggregate in the full model

rm(list = ls())
options(warn=-1)

setwd("D:/data_replication")

library('data.table')
library('stargazer')
library('ggplot2')
library('ggrepel')


# Import and format list of products for each country
#===============================================================================

product_id <- fread("estimation/2_product_list/product_id_declarant.csv")

pc8plus_i_k <- fread("estimation/4_demand_estimation/pc8plus_i_k.csv")
names(pc8plus_i_k) <- c("product_id", "V1", "k")
pc8plus_i_k <- pc8plus_i_k[ , .(product_id, k)]
pc8plus_i_k[ , product_id := as.integer(product_id)]

k_to_i_k <- fread("estimation/5_supply_side/k_to_i_k.csv")

product_id <- merge(product_id, pc8plus_i_k, by = "product_id", all = T)
product_id <- product_id[k != 0]
product_id <- product_id[ , .(declarant, k)]

product_id <- merge(product_id, k_to_i_k, by = "k", all = T)
product_id <- product_id[ , .(i_k, declarant)]

#===============================================================================


# Import and format product-specific price indexes
#===============================================================================

load('estimation/5_supply_side/data_PI_full_adj.RData')

intermediate_data <- fread('data/intermediates/intermediate_i_k.csv')
setnames(intermediate_data, "i_k", "product") 

data <- merge(data, intermediate_data, by = "product", all = T) 
data <- data[intermediate_i_k == 0]

data <- data[beta_price_20 < 0]
data <- data[beta_price_80 < 0]

data[ , u_beta_true_20 := log(P_true_sorted_20) - 1]                                     
data[ , u_beta_true_80 := log(P_true_sorted_80) - 1]                                    
data[ , u_beta_counter_20 := log(P_counter_sorted_20) - 1]
data[ , u_beta_counter_80 := log(P_counter_sorted_80) - 1]

# Create difference between poor and rich
#-------------------------------------------------------------------------------

data[ , diff := P_counter_sorted_20 / P_true_sorted_20 - P_counter_sorted_80 / P_true_sorted_80]

#===============================================================================


# Merge in Cobb-Douglas Weights
#===============================================================================

setnames(product_id, "declarant", "Declarant")
setnames(product_id, "i_k", "product")
product_id[ , index_i_k := 1]
data = merge(data, product_id, by = c("product", "Declarant"), all = T)
data <- data[index_i_k == 1]


w = read.csv("estimation/4_demand_estimation/4_cobb_douglas_weights/weights_i_k.csv")
colnames(w)[1:4] = c("product", "Year", "Quarter", "Declarant")
data = merge(data, w, by = c('product', 'Year', 'Quarter', 'Declarant'))
data = data[s_20_i_k != 0]
data = data[s_80_i_k != 0]

#===============================================================================


# Construct Price Index
#===============================================================================

# Remove extreme observations
#-------------------------------------------------------------------------------

data_PI = data
data_PI = data_PI[diff < 10 & diff > -10]
data_PI = data_PI[(is.na(u_beta_counter_80) | is.na(u_beta_counter_20) | is.na(u_beta_true_80) | is.na(u_beta_true_20)) == F]
data_PI = data_PI[(abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]
data_PI = data_PI[index_mc == F]                                                # Remove cases in which demand is not sufficiently elastic 

data_PI[ , v_true_20 := beta_price_20 * log(P_true_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_counter_20 := beta_price_20 * log(P_counter_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_true_80 := beta_price_80 * log(P_true_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]
data_PI[ , v_counter_80 := beta_price_80 * log(P_counter_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]

data_PI = data_PI[(v_true_20 == Inf | v_counter_20 == Inf | v_true_80 == Inf | v_counter_80 == Inf) == F]
data_PI = data_PI[(v_true_20 == -Inf | v_counter_20 == -Inf | v_true_80 == -Inf | v_counter_80 == -Inf) == F]

load("estimation/5_supply_side/product_id.RData")
colnames(product_id) = c("product", "Declarant")
product_id[ , idx_PI := 1]
data_PI = merge(data_PI, product_id, by = c('product', 'Declarant'), all = T)
data_PI <- data_PI[is.na(Year) == F]
data_PI <- data_PI[idx_PI == 1]

# Remove largest categories and readjust weights
#-------------------------------------------------------------------------------

setkey(data_PI, s_80_i_k)

data_PI <- data_PI[, .SD[((s_80_i_k < quantile(s_80_i_k, probs = 0.99)) & (s_20_i_k < quantile(s_20_i_k, probs = 0.99)))], by = 'Declarant']

data_PI[ , sum_w := sum(s_20_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_20_i_k := s_20_i_k / sum_w]

data_PI[ , sum_w := sum(s_80_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_80_i_k := s_80_i_k / sum_w]

# Compute Price Index
#-------------------------------------------------------------------------------

data_PI <- data_PI[ , PI_1 := exp(1 - sum(s_20_i_k*log(s_20_i_k))), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_true_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_true_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_counter_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_counter_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , diff := PI_counter_20 / PI_true_20 - PI_counter_80 / PI_true_80]
data_PI <- data_PI[ , gft_20 := PI_counter_20 / PI_true_20]
data_PI <- data_PI[ , gft_80 := PI_counter_80 / PI_true_80]
data_PI <- data_PI[ , .SD[1], by = c('Year', 'Quarter', 'Declarant')]

#===============================================================================


# Run Regression, save, and plot
#===============================================================================

Reg_P_summary <- lm(diff ~ 0 + as.factor(Declarant), data = data_PI)

# Format Data to Save Regression Output

declarant <- as.matrix(Reg_P_summary$coefficients)
declarant <- gsub("as.factor\\(Declarant\\)","",rownames(declarant))
declarant <- as.numeric(declarant)
income_data <- fread(file = "estimation/5_supply_side/gdp_per_capita_2007.csv")

# Save Regression

output <- merge(income_data, data.table(declarant, Reg_P_summary$coefficients), by = "declarant")
colnames(output) <- c(colnames(output)[1:4], "FE_declarant")
write.csv(file = "robustness/excl_intermediates/price_index/results/diff_PI_full_adj.csv", output)


# Plot
#-------------------------------------------------------------------------------

output <- fread('robustness/excl_intermediates/price_index/results/diff_PI_full_adj.csv')

iso3 <- fread('estimation/5_supply_side/ISO3_declarant_codes.csv')

output[ , gdp_log := log(gdp_per_capita_declarant)]
setkey(output, declarant)
setkey(iso3, declarant)

output <- output[iso3]
output <- output[is.na(country) == F]
output = output[declarant != 18] 
output = output[declarant != 46] 
output = output[declarant != 600] 


reg <- lm(FE_declarant ~ gdp_log, data = output)
summary(reg)
cor(output$FE_declarant, output$gdp_log)

plot_diff_full_adj <- ggplot(output, aes(x=gdp_log, y=FE_declarant)) +
  geom_text_repel(label=output$ISO3, size = 5) +
  theme(text = element_text(size=17)) +
  geom_smooth(method=lm) +
  labs(x = "GDP per capita (in logs)", y = expression(paste(Delta, "Welfare"["Poor"]," - ", Delta, "Welfare"["Rich"]))) 

ggsave("robustness/excl_intermediates/price_index/results/plot_PI_full_adj.png", plot = plot_diff_full_adj)




